From f683335fb0d06aef7170fac523ae2ab83173a7de Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Thu, 25 Apr 2013 17:50:53 +0000 Subject: [PATCH] Optimize gamma correction Switch from Chebyshev polynomial to Newton's method, which is both simpler and 1.5x faster for the same precision. --- babl/base/pow-24.c | 205 +++++++++++---------------------------------- 1 file changed, 51 insertions(+), 154 deletions(-) diff --git a/babl/base/pow-24.c b/babl/base/pow-24.c index 50d8d87..401a102 100644 --- a/babl/base/pow-24.c +++ b/babl/base/pow-24.c @@ -1,5 +1,5 @@ /* babl - dynamically extendable universal pixel conversion library. - * Copyright (C) 2012, Red Hat, Inc. + * Copyright (C) 2013 Loren Merritt * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -16,170 +16,67 @@ * . */ -#include #include #include "util.h" -/** - * This implements pow(x, 2.4) and pow(x, 1/2.4) using a chebyshev based polynominal approximation. - * This is based on the approach in: - * http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent/6478839#6478839 +/* a^b = exp(b*log(a)) * - * The code includes checbychev constants for the 9th order, which seems to give a max error - * of about 4e-09, but unless you define USE_MAX_POW24_ACCURACY the order has been limited to - * give an error of about 1e-7 (i.e. single precission floats). - */ - -/* Chebychev polynomial terms for x^(5/12) expanded around x=1.5 - * Non-zero terms calculated via - * NIntegrate[(2/Pi)*ChebyshevT[N,u]/Sqrt [1-u^2]*((u+3)/2)^(5/12),{u,-1,1}, PrecisionGoal->20, WorkingPrecision -> 100] - * Zeroth term is similar except it uses 1/pi rather than 2/pi. + * Extracting the exponent from a float gives us an approximate log. + * Or better yet, reinterpret the bitpattern of the whole float as an int. + * + * However, the output values of 12throot vary by less than a factor of 2 + * over the domain we care about, so we only get log() that way, not exp(). + * + * Approximate exp() with a low-degree polynomial; not exactly equal to the + * Taylor series since we're minimizing maximum error over a certain finite + * domain. It's not worthwhile to use lots of terms, since Newton's method + * has a better convergence rate once you get reasonably close to the answer. */ -static const double Cn[] = { - 1.1758200232996901923, - 0.16665763094889061230, - -0.0083154894939042125035, - 0.00075187976780420279038, - -0.000083240178519391795367, - 0.000010229209410070008679, - -1.3401001466409860246e-6, - 1.8333422241635376682e-7, - -2.5878596761348859722e-8 -}; - - -/* Returns x^(/12) for x in [1,2) */ -static double pow512norm (double x) +static inline double +init_newton (double x, double exponent, double c0, double c1, double c2) { - double Tn[9]; - double u; - - u = 2.0*x - 3.0; - Tn[0] = 1.0; - Tn[1] = u; - Tn[2] = 2*u*Tn[2-1] - Tn[2-2]; - Tn[3] = 2*u*Tn[3-1] - Tn[3-2]; - Tn[4] = 2*u*Tn[4-1] - Tn[4-2]; - Tn[5] = 2*u*Tn[5-1] - Tn[5-2]; - Tn[6] = 2*u*Tn[6-1] - Tn[6-2]; -#ifdef USE_MAX_POW24_ACCURACY - Tn[7] = 2*u*Tn[7-1] - Tn[7-2]; - Tn[8] = 2*u*Tn[8-1] - Tn[8-2]; -#endif - - return Cn[0]*Tn[0] + Cn[1]*Tn[1] + Cn[2]*Tn[2] + Cn[3]*Tn[3] + Cn[4]*Tn[4] + Cn[5]*Tn[5] + Cn[6]*Tn[6] -#ifdef USE_MAX_POW24_ACCURACY - + Cn[7]*Tn[7] + Cn[8]*Tn[8] -#endif - ; + int iexp; + double y = frexp(x, &iexp); + y = 2*y+(iexp-2); + c1 *= M_LN2*exponent; + c2 *= M_LN2*M_LN2*exponent*exponent; + return y = c0 + c1*y + c2*y*y; } -/* Precalculated (2^N) ^ (5 / 12) */ -static const double pow2_512[12] = { - 1.0, - 1.3348398541700343678, - 1.7817974362806785482, - 2.3784142300054420538, - 3.1748021039363991669, - 4.2378523774371812394, - 5.6568542494923805819, - 7.5509945014535482244, - 1.0079368399158985525e1, - 1.3454342644059433809e1, - 1.7959392772949968275e1, - 2.3972913230026907883e1 -}; - - -/* Returns x^(1/2.4) == x^(5/12) */ -double babl_pow_1_24 (double x) -{ - double s; - int iexp; - div_t qr; - - s = frexp (x, &iexp); - s *= 2.0; - iexp -= 1; - - qr = div (iexp, 12); - if (qr.rem < 0) { - qr.quot -= 1; - qr.rem += 12; - } - - return ldexp (pow512norm (s) * pow2_512[qr.rem], 5 * qr.quot); -} - -/* Chebychev polynomial terms for x^(7/5) expanded around x=1.5 - * Non-zero terms calculated via - * NIntegrate[(2/Pi)*ChebyshevT[N,u]/Sqrt [1-u^2]*((u+3)/2)^(7/5),{u,-1,1}, PrecisionGoal->20, WorkingPrecision -> 100] - * Zeroth term is similar except it uses 1/pi rather than 2/pi. +/* Returns x^2.4 == (x*(x^(-1/5)))^3, using Newton's method for x^(-1/5). + * Max absolute error is 4e-8. + * One more iteration of Newton would bring error down to 4e-15. + * + * (The above measurements apply to the input range can that be involved in + * gamma correction. Accuracy for inputs very close to 0 is somewhat worse, + * but that will hit the linear part of the gamma curve rather than call these. + * Out-of-range pixels >1.3 are also somewhat worse.) */ -static const double iCn[] = { - 1.7917488588043277509, - 0.82045614371976854984, - 0.027694100686325412819, - -0.00094244335181762134018, - 0.000064355540911469709545, - -5.7224404636060757485e-6, - 5.8767669437311184313e-7, - -6.6139920053589721168e-8, - 7.9323242696227458163e-9 -}; - -/* Returns x^(7/5) for x in [1,2) */ -static double pow75norm (double x) +double +babl_pow_24 (double x) { - double Tn[9]; - double u; - - u = 2.0*x - 3.0; - Tn[0] = 1.0; - Tn[1] = u; - Tn[2] = 2*u*Tn[2-1] - Tn[2-2]; - Tn[3] = 2*u*Tn[3-1] - Tn[3-2]; - Tn[4] = 2*u*Tn[4-1] - Tn[4-2]; - Tn[5] = 2*u*Tn[5-1] - Tn[5-2]; -#ifdef USE_MAX_POW24_ACCURACY - Tn[6] = 2*u*Tn[6-1] - Tn[6-2]; - Tn[7] = 2*u*Tn[7-1] - Tn[7-2]; - Tn[8] = 2*u*Tn[8-1] - Tn[8-2]; -#endif - - return iCn[0]*Tn[0] + iCn[1]*Tn[1] + iCn[2]*Tn[2] + iCn[3]*Tn[3] + iCn[4]*Tn[4] + iCn[5]*Tn[5] -#ifdef USE_MAX_POW24_ACCURACY - + iCn[6]*Tn[6] + iCn[7]*Tn[7] + iCn[8]*Tn[8] -#endif - ; + double y = init_newton (x, -1./5, 0.9953189663, 0.9594345146, 0.6742970332); + int i; + for (i = 0; i < 2; i++) + y = (1.+1./5)*y - ((1./5)*x*(y*y))*((y*y)*(y*y)); + x *= y; + return x*x*x; } -/* Precalculated (2^N) ^ (7 / 5) */ -static const double pow2_75[5] = { - 1.0, - 2.6390158215457883983, - 6.9644045063689921093, - 1.8379173679952558018e+1, - 4.8502930128332728543e+1 -}; - -/* Returns x^2.4 == x * x ^1.4 == x * x^(7/5) */ -double babl_pow_24 (double x) +/* Returns x^(1/2.4) == x*(x^(-1/12))^7, using Newton's method for x^(-1/12). + * Max absolute error is 7e-8 + * One more iteration of Newton would bring error down to 4e-14. + */ +double +babl_pow_1_24 (double x) { - double s; - int iexp; - div_t qr; - - s = frexp (x, &iexp); - s *= 2.0; - iexp -= 1; - - qr = div (iexp, 5); - if (qr.rem < 0) { - qr.quot -= 1; - qr.rem += 5; - } - - return x * ldexp (pow75norm (s) * pow2_75[qr.rem], 7 * qr.quot); + double y = init_newton (x, -1./12, 0.9976740658, 0.9885035151, 0.5907708377); + int i; + for (i = 0; i < 2; i++) + { + double z = (y*y)*(y*y); + z = (y*z)*(z*z); + y = (1.+1./12)*y - (1./12)*x*z; + } + return ((x*y)*(y*y))*((y*y)*(y*y)); } - -- 2.30.2